import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import util.MyExcel;
import util.MyFile;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;


/**
 * Created by hanrchen on 2016/7/13.
 */
public class DataPreprocess {

    final static String XLS_FILE_NAME = "data_month01.xls";
    public File _source_xls_File;
    final int SHEET_NUM = 0;

    //workbook
    private HSSFWorkbook _wbx;
    //sheet
    private HSSFSheet _sheetx;
    //rows
    private HSSFRow[] _rows;
    //row numbers
    private int ROWS_NUM;
    //column names
    private String[] colNames;
    //last cell index
    private int lastCellIndex;


    //column index
    private int _colIndex_CustomerSearchTerm;
    private int CUST_TERM_LEN;

    public DataPreprocess() {
    }

    public void loadData() throws Exception{
        this._source_xls_File = MyFile.findResourceFile(XLS_FILE_NAME);
        //get workbook
       this._wbx = MyExcel.readFile(this._source_xls_File);

       //get sheet
       this._sheetx = this._wbx.getSheetAt(this.SHEET_NUM);

        //get all rows
        this.ROWS_NUM = this._sheetx .getPhysicalNumberOfRows();
        //check row number
        if(this.ROWS_NUM<1){
            throw new Exception("Excel "+ this._source_xls_File.getAbsolutePath() +" sheet " + this.SHEET_NUM + " has no data\n");
        }

        //get rows
        this._rows = new HSSFRow[ROWS_NUM];
        for(int i=0;i<ROWS_NUM;i++){
            _rows[i] = _sheetx.getRow(i);
        }


        // read excel file rows
        this.colNames = MyExcel.getColumnNames(this._rows[0]); // get column header names
        this.lastCellIndex = MyExcel.getLastCellNum(_rows[0]); // get last cell index

        //customer search term column index
        this.CUST_TERM_LEN = this.lastCellIndex + 1;
    }


    /**
     * Remove duplicated customer search terms
     */
    public void distinctCustomerSearchTerms(){

    }

    public void countCustomerSearchTermsLength() throws IOException {
        this._colIndex_CustomerSearchTerm= MyExcel.getColumnNameIndex(this.colNames, "CustomerSearchTerm");  //get customer keywords
        //row 0 is head row, so start from row 1
        for(int i=1; i<this._rows.length;i++){
            HSSFRow xRow = this._rows[i];
            String xCustomerSearchTerm= MyExcel.getCellValue(xRow.getCell(this._colIndex_CustomerSearchTerm)).trim();
            int len = xCustomerSearchTerm.split("\\u0020").length;
            xRow.createCell(this.CUST_TERM_LEN).setCellValue(len);
        }

        //write to the xle
        FileOutputStream out = new FileOutputStream(this._source_xls_File);
        this._wbx.write(out);
        out.close();
    }

}
